import pandas as pd
df = {}
from clustergrammer2 import net
import ipywidgets as widgets
import numpy as np
from bqplot import pyplot as plt
import bqplot
from glob import glob
df['cell_type'] = pd.read_excel('../data/big_data/mmc2__codex_cell_type_info.xlsx', index_col=0)
df['cell_type'].shape
ct_dict = {}
cell_types = []
for inst_id in df['cell_type'].index.tolist():
inst_ct = df['cell_type']['Imaging phenotype (cell type)'].loc[inst_id]
cell_types.append(inst_ct)
ct_dict[inst_id] = inst_ct
cell_types = sorted(list(set(cell_types)))
print(len(cell_types))
cell_types
glob('../data/big_data/*.txt')
glob('../data/big_data/*.csv')
See http://welikesharingdata.blob.core.windows.net/forshare/index.html
%%time
df['exp'] = pd.read_csv('../data/big_data/Suppl.Table2.CODEX_paper_MRLdatasetexpression.csv')
new_rows = ['C-' + str(x) for x in df['exp'].index.tolist()]
df['exp'].index = new_rows
print(df['exp'].shape)
df['exp'].head()
df['exp'].columns.tolist()
sample_list = [x.split('_')[0] for x in list(df['exp']['sample_Xtile_Ytile'].get_values())]
ser_sample = pd.Series(sample_list, name='sample_slide', index=df['exp'].index.tolist())
print(len(sorted(list(set(sample_list)))))
list_slides = sorted(list(set(sample_list)))
list_slides
df['exp']['sample_slide'] = ser_sample
ser_sample.value_counts()
df['exp'].shape
df['exp']['Z.Z'].hist()
cols = df['exp'].columns.tolist()
cols
exp_cols = cols[1:30]
exp_cols
unique_dict = {}
for inst_col in cols:
inst_list_unique = list(df['exp'][inst_col].unique())
unique_dict[inst_col] = inst_list_unique
inst_num_unique = len(inst_list_unique)
print(inst_col, inst_num_unique)
BALBc: normal tissue MRL/lpr: spleen from animals with systemic autoimmune disease
Start with: 'BALBc-1_X01_Y01'
keep_rows = []
for inst_tile in ['BALBc-1_X01_Y01']:
ser_tile = df['exp']['sample_Xtile_Ytile']
ser_found = ser_tile[ser_tile == inst_tile]
ser_found.shape
keep_rows.extend(ser_found.index.tolist())
df['tile'] = df['exp'].loc[keep_rows].transpose()
df['tile'].shape
cats = df['tile'].loc['Imaging phenotype cluster ID']
cats = [ct_dict[x] for x in cats]
new_cols = []
cols = df['tile'].columns.tolist()
for index in range(len(cols)):
new_col = (cols[index], 'Cell Type: ' + str(cats[index]))
new_cols.append(new_col)
df['tile'].columns = new_cols
df['tile'].shape
df['tile'].head()
df['tile-exp-ini'] = df['tile'].loc[exp_cols]
df['tile-exp-ini'].shape
ser_sum = df['tile-exp-ini'].sum(axis=0).sort_values(ascending=False)
ser_sum.plot(grid=True)
print(ser_sum.shape)
ser_sum = ser_sum[ser_sum < 100000]
print(ser_sum.shape)
ser_sum = ser_sum[ser_sum > 0]
print(ser_sum.shape)
keep_cells = ser_sum.index.tolist()
df['tile-exp'] = df['tile-exp-ini'][keep_cells]
df['tile-exp'].shape
df['tile-exp'][df['tile-exp'] < 0] = 0
df['tile-exp'].transpose().describe()
# df['tile-exp-ash'] = np.arcsinh(df['tile-exp']/5)
df['tile-exp'][df['tile-exp'] > 5000] = 5000
# ser_vals = pd.Series(df['tile-exp'].get_values().flatten())
# ser_vals.hist(bins=100)
df['tile-exp'].shape
df['tile'].loc['Z.Z'].head()
# df['tile'].loc['Z.Z'].hist()
df['tile-loc'] = df['tile'].loc[['X.X', 'Y.Y']][keep_cells].transpose()
df['tile-loc'].shape
df['tile-loc']['Y.Y'] = 1000 - df['tile-loc']['Y.Y']
df['tile-loc'] = df['tile-loc'].astype('int')
def set_expression_opacity(inst_gene):
ser_opacity = df['tile-exp'].loc[inst_gene]
list_opacity = [float(x/ser_opacity.max()) for x in list(ser_opacity.get_values())]
scatter.default_opacities = list_opacity
fig = plt.figure(title='Scatter')
def_tt = bqplot.Tooltip(fields=['name'], formats=[''])
scatter = plt.scatter(df['tile-loc']['X.X'],
df['tile-loc']['Y.Y'],
figsize=(20,10),
ylim=(0,1000),
xlim=(0,1000), stroke='black',
tooltip=def_tt,
names=df['tile-loc'].index.tolist(),
display_names=False)
inst_width = 900
fig.layout.min_height = str(inst_width/1.2) + 'px'
fig.layout.min_width = str(inst_width) + 'px'
# scatter.default_opacities = [0.5]
# scatter.default_opacities = [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.2, 0.1, 0.1]
set_expression_opacity('NKp46')
scatter.default_size = 100
scatter.colors = ['red']
net.load_df(df['tile-exp'])
net.widget()
ini_colors = net.viz['cat_colors']['col']['cat-0']
cat_colors = {}
for inst_color in ini_colors:
cat_colors[inst_color.split(': ')[1]] = ini_colors[inst_color]
cat_colors
fig
# ser_opacity = df['tile-exp'].loc['CD106']
# ser_opacity = df['tile-exp'].loc['CD44']
ser_opacity = df['tile-exp'].loc['CD45']
# ser_opacity = df['tile-exp'].loc['NKp46']
# ser_opacity = df['tile-exp'].loc['IgD']
# ser_opacity = df['tile-exp'].loc['IgM']
list_opacity = [float(x/ser_opacity.max()) for x in list(ser_opacity.get_values())]
scatter.default_opacities = list_opacity
from scipy.spatial import Voronoi
vor = Voronoi(df['tile-loc'])
point_list = df['tile-loc'].index.tolist()
point_names = [x[0] for x in point_list]
cat_names = [x[1].split(': ')[1] for x in point_list]
len(cat_names)
point_names[0]
patch_data = {}
patch_data['x'] = []
patch_data['y'] = []
patch_data['colors'] = []
region_labels = []
region_point_dict = {}
for point_index in range(vor.point_region.shape[0]):
region_index = vor.point_region[point_index]
region_point_dict[region_index] = point_index
for region_index in range(len(vor.regions)):
inst_region = vor.regions[region_index]
if -1 not in inst_region and len(inst_region) > 0:
point_index = region_point_dict[region_index]
point_cat = cat_names[point_index]
region_labels.append(point_cat)
# save cat_colors
inst_color = cat_colors[point_cat]
patch_data['colors'].append(inst_color)
x_list = []
y_list = []
for inst_vertex in inst_region:
inst_pos = vor.vertices[inst_vertex]
x_list.append(inst_pos[0])
y_list.append(inst_pos[1])
patch_data['x'].append(x_list)
patch_data['y'].append(y_list)
import bqplot.pyplot as plt
fig = plt.figure(animation_duration=1000)
patch = plt.plot([], [],
fill='inside',
fill_colors=patch_data['colors'],
stroke_width=1,
close_path=True,
labels=region_labels,
tooltip=def_tt,
axes_options={'x': {'visible': False}, 'y': {'visible': False}},
)
scatter = plt.scatter(df['tile-loc']['X.X'],
df['tile-loc']['Y.Y'],
# figsize=(20,10),
# ylim=(0,1000),
# xlim=(0,1000),
tooltip=def_tt,
names=point_names,
display_names=False, default_size=2)
inst_width = 1000
fig.layout.min_height = str(inst_width/1.2) + 'px'
fig.layout.min_width = str(inst_width) + 'px'
patch.x = patch_data['x']
patch.y = patch_data['y']
plt.xlim(0,1000)
plt.ylim(0,1000)
fig
patch_data = {}
patch_data['x'] = []
patch_data['y'] = []
patch_data['colors'] = []
for inst_region in vor.regions:
if -1 not in inst_region and len(inst_region) > 0:
x_list = []
y_list = []
for inst_vertex in inst_region:
inst_pos = vor.vertices[inst_vertex]
x_list.append(inst_pos[0])
y_list.append(inst_pos[1])
patch_data['x'].append(x_list)
patch_data['y'].append(y_list)
patch_data['colors'].append('red')
# import bqplot.pyplot as plt
fig = plt.figure(animation_duration=1000)
patch = plt.plot([],[],
fill_colors=patch_data['colors'],
fill='inside',
axes_options={'x': {'visible': False}, 'y': {'visible': False}},
stroke_width=1,
close_path=True,
tooltip=def_tt,)
# patch.x = [[4, 5, 6, 6, 5, 4, 3, 1], [1, 2, 3]],
# patch.y = [[1, 1.1, 1.2, 2.3, 2.2, 2.7, 1.0, -2], [3, 2, 3]]
patch.x = patch_data['x']
patch.y = patch_data['y']
plt.xlim(0,1000)
plt.ylim(0,1000)
fig